
**************************
* DESCRIPTIVE STATISTICS *
**************************

* Labussiere and Vink (2020) 
* Replication Data for: The intergenerational impact of naturalisation reforms: 
* the citizenship status of children of immigrants in the Netherlands, 1995-2016
* Last modification: 22/01/2020

* This do-file describes the commands used to produce 
* 	- Figures 2 and 3 (manuscript)
*	- Tables A1, A2, A3 (appendix)
* STATA version: 14

* IN: 6_final_addvar.dta (available within CBS secured environment upon request)
* See our Synthesis "Data construction" for information about this dataset (p.11)


* Identifying variables:
* RINPERSOON___ Personal individual identifier  (9-digit)
* YEAR_________ Year of observation (4-digit)
* NEW_IND______ Dummy = 1 at the respondent's first year of observation
* TAG__________ Dummy = 1 when year = last year of observation + 1
*		 (artifical year used in the Survival Analysis to create time intervals)
* Other variables are described in the codebook.


	**************
	* MANUSCRIPT *
	**************
	

************
* FIGURE 2 *
************
use 6_final_addvar.dta, clear
drop if tag == 1 /*remove artificial year added for the survival analysis */
gen Nyear = 1
gen year_birth = 0
replace year_birth = date_birth if year == date_birth
tab sum_nat_t5, miss gen(D_sumnat)
gen D_sumnatdiffD = (sum_nat_t5 != .b)

collapse (sum) Nyear ///
diff_from_Dutch = D_sumnatdiffD ///
no_naturalisation = D_sumnat1 ///
naturalisation = D_sumnat2 ///
dutch_from_birth = D_sumnat3, ///
by(year_birth) 

foreach n in no_naturalisation naturalisation dutch_from_birth {
gen per_`n' = (`n'/Nyear)*100
}
gen nat_rate =(naturalisation/diff_from_Dutch)*100

drop if year_birth == 0 

* labels
label variable per_no_naturalisation "Individuals who are not Dutch citizens "
label variable per_naturalisation "Individuals who naturalise over the period"
label variable per_dutch_from_birth "Individuals who are Dutch citizens from birth"
label variable nat_rate "Naturalisation rate"

* FIGURE 2 output
twoway (connected per_dutch_from_birth year_birth, msymbol(square) color("187 85 102")) ///
 (connected per_no_naturalisation year_birth, msymbol(triangle) color("0 68 136")) ///
 (connected per_naturalisation year_birth, color("0 0 0")), ///
xtitle("Year of birth", height(4)) ytitle("Percentage (%)", height(6)) ///
graphregion(color(white)) xtick(1995(1)2010) legend(cols(1) region(lcolor(white)))


************
* FIGURE 3 *
************
use 6_final_addvar.dta, clear
drop if tag == 1 /*remove artificial year added for the survival analysis */
gen year_nat = 0
replace year_nat = date_nat_t5 if ind_nat_t5 == 1
tab nat_with_whom, gen(D_natwwh)
keep if sum_nat_t5 == 1

* We delete individuals who naturalised before 18 
drop if age_nat_t5 >= 18 

collapse (sum)  ///
Nyear_nat = ind_nat_t5 ///
mother_only = D_natwwh1 ///
father_only = D_natwwh2 ///
both_parents = D_natwwh3, by(year_nat)
 
foreach n in mother_only father_only both_parents {
gen per_`n' = (`n'/Nyear_nat)*100
}

drop if year_nat == 0

* labels
label variable per_mother_only "Naturalisation with the mother only"
label variable per_father_only "Naturalisation with the father only"
label variable per_both_parents "Naturalisation with both parents"

* FIGURE 3 output
twoway (connected per_mother_only year_nat, msymbol(square) color("187 85 102")) ///
(connected per_father_only year_nat, msymbol(X) color("0 68 136")) ///
(connected per_both_parents year_nat, color("0 0 0")), ///
xtitle("Year of naturalisation", height(5)) ytitle("Percentage of co-naturalisations (%)", height(6))  ///
graphregion(color(white)) xtick(1996(1)2016) legend(cols(1) region(lcolor(white))) 


	**************
	* APPENDIX *
	**************

	
************
* TABLE A1 *
************
use 6_final_addvar.dta, clear
drop if tag == 1 /*remove artificial year added for the survival analysis */

* gender
tab gbageslacht if new_ind == 1, miss

* parents SES
tab secm_pa_agg2 if new_ind == 1, miss
tab secm_ma_agg2 if new_ind == 1, miss

* EU origin country
tab EU_sum_child if new_ind == 1, miss 

* country of origin
tab country_of_origin if new_ind == 1, miss
tab country_of_origin_bis if new_ind == 1, miss

	* dummy father's country is the same than that of the mother
	gen same_countryH = (gbaherkomstgroepering_ma == gbaherkomstgroepering_pa)
	replace same_countryH = . if missing(gbaherkomstgroepering_ma) | missing(gbaherkomstgroepering_pa)
	bys rinpersoon: egen same_countryS = sum(same_countryH)
	gen same_country = (same_countryS > 0)
	replace same_country = . if missing(same_countryS)
	tab same_country if new_ind == 1, miss
	* 88% of children have parents who come from the same origin country

* legal status
tab sum_nat_t5 if new_ind == 1, miss

	
************
* TABLE A2 *
************
* sample A - eligibility period
tab period_eli_child6 if new_ind == 1 & sum_nat_t5 != .b, miss

* sample B - legal status
tab sum_nat_t5 if new_ind == 1 & one_eli_child == 1

* sample B - dummy both parents are eligible
tab sum_bothp_eli if new_ind == 1 & sum_nat_t5 != .b & one_eli_child == 1

* sample C
tab nat_with_whom if new_ind == 1 & sum_nat_t5 == 1
tab natsup18 if new_ind == 1 & sum_nat_t5 == 1


************
* TABLE A3 *
************
tab country_of_origin sum_nat_t5 if new_ind == 1, miss
tab country_of_origin sum_nat_t5 if new_ind == 1, row nofreq miss
